# -*- coding: utf-8 -*-
"""
Sort Analyzer
"""

import string
import PyLucene

class NXAsciiFilter(object):

    ACCENTED_CHARS_TRANSLATIONS = string.maketrans(
        r"""???????????????????????????????????????????????????????""",
        r"""AAAAAACEEEEIIIINOOOOOOUUUUYaaaaaaceeeeiiiinoooooouuuuyy""")

    def __init__(self, tokenStream):
        self.input = tokenStream

    def toAscii(self, s):
        """Change accented and special characters by ASCII characters.
        """
        s = s.translate(self.ACCENTED_CHARS_TRANSLATIONS)
        s = s.replace('?', 'AE')
        s = s.replace('?', 'ae')
        s = s.replace('?', 'OE')
        s = s.replace('?', 'oe')
        s = s.replace('?', 'ss')
        return s

    def next(self):
        token = self.input.next()
        if token is None:
            return None

        ttext = token.termText()
        
        if not ttext:
            return None

        try:
            ttext = str(ttext.encode('ISO-8859-15'))
            ttext = self.toAscii(ttext)
        except UnicodeEncodeError:
            ttext = ttext

        return PyLucene.Token(ttext, token.startOffset(),
                              token.endOffset(), token.type())

class NXSortAnalyzer(object):
    """NX SortAnalyzer

    Dedicated analyzer for soring purpose. It only applies a standard
    and the lowercase analyzers.

    Use this analyzer applied on fields that you will use for soring
    purpose only.
    """

    def tokenStream(self, fieldName, reader):

        result = PyLucene.StandardTokenizer(reader)
        result = NXAsciiFilter(result)
        result = PyLucene.LowerCaseFilter(result)

        return result
